LAGOS Analysis

Loading in data

First download and then specifically grab the locus (or site lat longs)

#Get data
lagosne_get(dest_folder = LAGOSNE:::lagos_path(),overwrite=T)

#Load in lagos
lagos <- lagosne_load()

#Grab the lake centroid info
lake_centers <- lagos$locus

# Make an sf object 
spatial_lakes <- st_as_sf(lake_centers,coords=c('nhd_long','nhd_lat'),
                          crs=4326)

#Grab the water quality data
nutr <- lagos$epi_nutr

Convert to spatial data

#Look at the structure
str(lake_centers)
## 'data.frame':    141265 obs. of  18 variables:
##  $ lagoslakeid      : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ nhdid            : chr  "50524769" "123632625" "50524793" "135695054" ...
##  $ gnis_name        : chr  NA "Benton Pond" NA "Wedge Pond" ...
##  $ nhd_lat          : num  42.5 42.2 42.5 42.5 42.6 ...
##  $ nhd_long         : num  -73.2 -73 -73.2 -71.1 -70.8 ...
##  $ lake_area_ha     : num  114.95 24.87 75.41 9.26 14 ...
##  $ lake_perim_meters: num  9296 2939 5822 1417 2013 ...
##  $ nhd_fcode        : int  43613 39009 43613 39004 39004 39010 39004 39004 39004 39004 ...
##  $ nhd_ftype        : int  436 390 436 390 390 390 390 390 390 390 ...
##  $ iws_zoneid       : chr  "IWS_45400" "IWS_41585" "IWS_44511" "IWS_42712" ...
##  $ hu4_zoneid       : chr  "HU4_12" "HU4_7" "HU4_12" "HU4_10" ...
##  $ hu6_zoneid       : chr  "HU6_15" "HU6_10" "HU6_15" "HU6_11" ...
##  $ hu8_zoneid       : chr  "HU8_49" "HU8_41" "HU8_49" "HU8_35" ...
##  $ hu12_zoneid      : chr  "HU12_16694" "HU12_16612" "HU12_16694" "HU12_16625" ...
##  $ edu_zoneid       : chr  "EDU_75" "EDU_27" "EDU_75" "EDU_58" ...
##  $ county_zoneid    : chr  "County_319" "County_319" "County_319" "County_326" ...
##  $ state_zoneid     : chr  "State_2" "State_2" "State_2" "State_2" ...
##  $ elevation_m      : num  295.86 447.83 295.86 4.26 13.65 ...
spatial_lakes <- st_as_sf(lake_centers,coords=c('nhd_long','nhd_lat'),
                          crs=4326) %>%
  st_transform(2163)

#Subset for plotting
subset_spatial <- spatial_lakes %>%
  slice(1:100) 

subset_baser <- spatial_lakes[1:100,]

#Dynamic mapviewer
mapview(subset_spatial)

Subset to only Minnesota

states <- us_states()

#Plot all the states to check if they loaded
#mapview(states)
minnesota <- states %>%
  filter(name == 'Minnesota') %>%
  st_transform(2163)

#Subset lakes based on spatial position
minnesota_lakes <- spatial_lakes[minnesota,]

#Plotting the first 1000 lakes
minnesota_lakes %>%
  arrange(-lake_area_ha) %>%
    slice(1:1000) %>%
  mapview(.,zcol = 'lake_area_ha')

Project Work

Map outline of Iowa and Illinois

ILIA <- states %>%
  filter(name == 'Illinois' | name == 'Iowa') %>%
  st_transform(2163)

# view outline of Illinois and Iowa
mapview(ILIA)
#Subset lakes based on spatial position
ILIA_lakes <- spatial_lakes[ILIA,] %>% 
    mutate(lake_area_logha = log(lake_area_ha))

#Plotting the first 1000 lakes
ILIA_lakes %>%
  arrange(-lake_area_ha) %>%
    slice(1:1000) %>%
  mapview(.,zcol = 'lake_area_ha')

Subset LAGOS data to these sites, how many sites are in Illinois and Iowa combined? How does this compare to Minnesota?

Minnesota has many more lakes (29,038) than Illinois and Iowa combined (16,466). That’s more than the land of 10,000 lakes!

length(unique(minnesota_lakes$lagoslakeid))
## [1] 29038
length(unique(ILIA_lakes$lagoslakeid))
## [1] 16466

What is the distribution of lake size in Iowa vs. Minnesota?

There are many more lakes in Minnesota than Iowa. Minnesota lakes are much larger on average and overall have a larger spread of lake size.

#create iowa lakes file
iowa <- states %>%
  filter(name == 'Iowa') %>%
  st_transform(2163)

iowa_lakes <- spatial_lakes[iowa,]

sum(iowa_lakes$lake_area_ha)
## [1] 51181.6
sum(minnesota_lakes$lake_area_ha)
## [1] 1208191
#combine ia and mn data, add statename to lake
MNIA_lakes <- rbind(iowa_lakes %>% mutate(state="Iowa",total_area=sum(lake_area_ha)),minnesota_lakes %>% mutate(state="Minnesota",total_area=sum(lake_area_ha))) %>% 
  mutate(lake_area_logha = log(lake_area_ha))
           
ggplot(MNIA_lakes,aes(lake_area_logha)) +
  geom_histogram(bins=25) +
  facet_wrap(~state)

ggplot(MNIA_lakes,aes(lake_area_logha)) +
  geom_boxplot() +
  facet_wrap(~state) +
  coord_flip()

Plot lakes in Iowa and Illinois and color them by lake area in hectares

#Plotting the first 1000 lakes
ILIA_lakes %>%
  arrange(-lake_area_logha) %>%
  mapview(.,zcol = 'lake_area_logha')

What other data sources might we use to understand how reservoirs and natural lakes vary in size in these three states?

This analysis has potential to be improved by incorporating other data!

Other data sources could include state records of lakes, satellite imagery of lake boundaries over time, property records, or other research efforts.

It would also be interesting to see how sizes of waterbodies vary when comparing reservoirs vs. natural lakes, and what percent of lakes are natural between the three states. I’d be curious to see if the definition of a “lake” varies between any of these states as well. I’d also like to know how these numbers have changed over time, either in the creation/destruction of lakes, or natural changes to lake boundaries.

Subset columns nutr to only keep key info that we want

clarity_only <- nutr %>%
  select(lagoslakeid,sampledate,chla,doc,secchi) %>%
  mutate(sampledate = as.character(sampledate) %>% ymd(.))

Keep sites with at least 200 observations

#Look at the number of rows of dataset
#nrow(clarity_only)

chla_secchi <- clarity_only %>%
  filter(!is.na(chla),
         !is.na(secchi))

# How many observatiosn did we lose?
# nrow(clarity_only) - nrow(chla_secchi)


# Keep only the lakes with at least 200 observations of secchi and chla
chla_secchi_200 <- chla_secchi %>%
  group_by(lagoslakeid) %>%
  mutate(count = n()) %>%
  filter(count > 200)

Join water quality data to spatial data

spatial_200 <- inner_join(spatial_lakes,chla_secchi_200 %>%
                            distinct(lagoslakeid,.keep_all=T),
                          by='lagoslakeid')

Mean Chl_a map

### Take the mean chl_a and secchi by lake

mean_values_200 <- chla_secchi_200 %>%
  # Take summary by lake id
  group_by(lagoslakeid) %>%
  # take mean chl_a per lake id
  summarize(mean_chl = mean(chla,na.rm=T),
            mean_secchi=mean(secchi,na.rm=T)) %>%
  #Get rid of NAs
  filter(!is.na(mean_chl),
         !is.na(mean_secchi)) %>%
  # Take the log base 10 of the mean_chl
  mutate(log10_mean_chl = log10(mean_chl))

#Join datasets
mean_spatial <- inner_join(spatial_lakes,mean_values_200,
                          by='lagoslakeid') 

#Make a map
mapview(mean_spatial,zcol='log10_mean_chl')

What is the correlation between Secchi Disk Depth and Chlorophyll a for sites with at least 200 observations?

Secchi disk depth is negatively exponentially correlation with secchi disk depth. As chlorophyll increases, there becomes a point where very little light is able to penetrate the water beyond a short distance. At a certain point, it does not matter how much more additional chlorophyll is in the water, as the light cannot penetrate anyway.

#plot correlation between secchi disk depth and chla
ggplot(chla_secchi %>% 
         group_by(lagoslakeid) %>% 
         summarise(meanchla=mean(chla),
                   meansecchi=mean(secchi)),aes(meanchla,meansecchi)) + 
  geom_point()

Which states have the most data?

# get count for each lake id
lake_centers <- lake_centers %>%
  group_by(lagoslakeid,nhd_long,nhd_lat,state_zoneid) %>%
  summarise(n=n())
## `summarise()` has grouped output by 'lagoslakeid', 'nhd_long', 'nhd_lat'. You
## can override using the `.groups` argument.
# join data to include state names

lake_states <- lagos$state
lake_statecenters<-left_join(lake_centers,lake_states,"state_zoneid")

# group by state and summarise to find total count
lake_obsn <- lake_statecenters %>% 
  group_by(state_name) %>% 
  summarise(n=sum(n)) %>% 
  arrange(desc(n)) %>% 
  drop_na()

states <- us_states() %>% 
  mutate(state_name=name)

# print table of counts
kable(lake_obsn)
state_name n
Minnesota 29022
Michigan 15569
Wisconsin 13790
New York 11950
Illinois 11805
Missouri 9116
Indiana 7942
Ohio 6120
Pennsylvania 5922
Maine 5518
Iowa 4636
Massachusetts 3912
New Jersey 3333
New Hampshire 2544
Connecticut 2025
Vermont 1626
Rhode Island 618
# make map of counts by state
lake_statecenterboundaries <- left_join(lake_obsn,states,"state_name") %>% 
  drop_na()

lake_countmap <- st_as_sf(lake_statecenterboundaries)

mapview(lake_countmap,zcol='n')

Is there a spatial pattern in Secchi disk depth for lakes with at least 200 observations?

Lakes that were further from urban areas tended to have higher secchi depth disks, indicating clearer water. This could be due to nutrient runoff from high population areas leading to high amounts of chla, and lower visibility in the water.

spatial_200 <- st_as_sf(left_join(chla_secchi_200,lake_centers,"lagoslakeid"),coords=c("nhd_long","nhd_lat"))

mapview(spatial_200,zcol='secchi')